import requests
from bs4 import BeautifulSoup
import json
import time

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36 Edg/140.0.0.0"
}

result = []
def get_info():
    items = soup.select("div.right-con ul li")
    for item in items:
        result.append(
            {
                "title": item.select_one("a").string,
                "time": item.select_one("span").string,
                "link": "https://xgc.xidian.edu.cn"
                + item.select_one("a[title]").get("href")[2:],
                "preview":"暂无"
            }
        )


res = requests.get("https://xgc.xidian.edu.cn/tzgg1.htm")

res.encoding = "utf-8"

soup = BeautifulSoup(res.text, "html.parser")

total_page_count = int(soup.select_one("#fanye190188").string.split("/")[1])
print(total_page_count)
get_info()

need_page_count = 10

for page_number in range(total_page_count,total_page_count-need_page_count,-1):
    res = requests.get(f"https://xgc.xidian.edu.cn/tzgg1/{page_number}.htm")

    res.encoding = "utf-8"

    soup = BeautifulSoup(res.text, "html.parser")
    
    get_info()
    
    print(result)
    print(f"已经获取第{total_page_count-page_number+1}/{need_page_count}页")
    
    time.sleep(0.5)
    

with open("./data/data.json","+a",encoding="utf-8") as file:
    json.dump(result,file,ensure_ascii=False,indent=4)


